import re
import sys, os

titles = set()
for name in os.listdir(sys.argv[1]):
  title = re.match(r"\d+-(.*).page", name).group(1)
  titles.add(title)

synonyms = []
for line in file(sys.argv[2]):
  cells = line.split('\t')
  if len(cells) < 3:
    continue
  m = re.match(r"#.*\[\[(.*)\]\]", cells[2])
  if not m:
    continue
  title = m.group(1)
  synonym = cells[1]
  if title in titles:
    synonyms.append((synonym, title))

synonyms.sort()
for synonym, title in synonyms:
  print "%s\t%s" % (synonym, title)


